# Load required libraries
library(dplyr)
library(lme4)
library(moments)
library(modelsummary)
library(broom.mixed)

# Load data
data <- readRDS("tablesm1_data.rds")
# Define services in desired order
services <- c("Waste_treatment", "Waste_collection", "Transport", "Fire", "Libraries",
              "Civil_protection", "Drinking_water", "Sewer")

# Data transformation function
transform_skewed_data <- function(data) {
  transform_variable <- function(x) {
    skew <- skewness(x, na.rm = TRUE)
    
    if (abs(skew) > 1) {
      if (skew > 0) {
        if (min(x, na.rm = TRUE) > 0) {
          x_transformed <- log(x)
        } else {
          x_shifted <- x - min(x, na.rm = TRUE) + 1
          x_transformed <- log(x_shifted)
        }
      } else {
        x_transformed <- exp(scale(x))
      }
      return(as.vector(scale(x_transformed)))
    } else {
      return(as.vector(scale(x)))
    }
  }
  
  data %>%
    mutate(
      population = transform_variable(population),
      population_squared = transform_variable(I(population^2)),
      VOTANTS_PERCENT = transform_variable(VOTANTS_PERCENT),
      EUROS_HABITANT = transform_variable(EUROS_HABITANT),
      Densitat..hab..km.. = transform_variable(Densitat..hab..km..),
      year = factor(year)
    )
}

# Model fitting function
fit_mixed_model <- function(service_name, data) {
  service_data <- data %>% filter(service == service_name)
  
  tryCatch({
    model <- glmer(imc_dummy ~ population + I(population^2) + Densitat..hab..km.. + 
                     VOTANTS_PERCENT + EUROS_HABITANT + 
                     (1 | year) + (1 | muni),
                   data = service_data,
                   family = binomial,
                   control = glmerControl(
                     optimizer = "bobyqa",
                     optCtrl = list(maxfun = 2e4),
                     check.conv.grad = .makeCC("warning", 0.2),
                     check.conv.singular = .makeCC(action = "warning", tol = 1e-4)
                   ))
    
    if(isSingular(model)) {
      cat("Singular fit for", service_name, "\n")
    }
    
    return(model)
  }, error = function(e) {
    cat("Error in", service_name, "model:\n")
    print(e)
    return(NULL)
  })
}

# VIF calculation function
calculate_vif <- function(model, service_name) {
  tryCatch({
    X <- model.matrix(model, type = "fixed")
    X <- X[, -1, drop = FALSE]  # Remove intercept
    
    if (ncol(X) > 1) {
      cor_matrix <- cor(X)
      vif_values <- diag(solve(cor_matrix))
      names(vif_values) <- colnames(X)
      return(vif_values)
    } else {
      return(c("Only one predictor" = NA))
    }
  }, error = function(e) {
    cat("Error calculating VIF for", service_name, ":\n")
    print(e)
    return(NULL)
  })
}

# Model diagnostics function
check_model_issues <- function(models) {
  issues <- lapply(names(models), function(name) {
    model <- models[[name]]
    if (!is.null(model)) {
      singular <- isSingular(model)
      conv <- model@optinfo$conv$lme4$messages
      c(Singular = singular,
        Convergence = !is.null(conv),
        Messages = if(!is.null(conv)) paste(conv, collapse="; ") else "None")
    }
  })
  names(issues) <- names(models)
  return(do.call(rbind, issues))
}

# MAIN ANALYSIS
cat("=== MIXED EFFECTS MODELS: Population + Population² + Density ===\n")

# Transform data
data_transformed <- transform_skewed_data(data)

# Check data quality
cat("Missing values:\n")
cat("  Population:", sum(is.na(data_transformed$population)), "\n")
cat("  Density:", sum(is.na(data_transformed$Densitat..hab..km..)), "\n")

# Fit models for each service
available_services <- unique(data_transformed$service)
service_names <- services[services %in% available_services]

cat("\nFitting models for services:", paste(service_names, collapse = ", "), "\n")

models <- setNames(
  lapply(service_names, function(x) fit_mixed_model(x, data_transformed)),
  service_names
)

# Remove failed models
valid_models <- models[!sapply(models, is.null)]

# Calculate VIF for each model
cat("\n=== VARIANCE INFLATION FACTORS ===\n")
vif_results <- list()

for (service_name in names(valid_models)) {
  cat("\n", service_name, ":\n")
  model <- valid_models[[service_name]]
  vif_values <- calculate_vif(model, service_name)
  
  if (!is.null(vif_values)) {
    vif_results[[service_name]] <- vif_values
    print(round(vif_values, 3))
    
    # Flag problematic VIF values
    high_vif <- vif_values[vif_values > 5 & !is.na(vif_values)]
    if (length(high_vif) > 0) {
      cat("  *** High VIF (>5):", names(high_vif), "\n")
    }
  }
}

# VIF summary
if (length(vif_results) > 0) {
  vif_df <- do.call(rbind, lapply(names(vif_results), function(service) {
    vif_vals <- vif_results[[service]]
    data.frame(
      Service = service,
      Variable = names(vif_vals),
      VIF = round(vif_vals, 3),
      stringsAsFactors = FALSE
    )
  }))
  
  cat("\nVIF Summary:\n")
  cat("  Mean VIF:", round(mean(vif_df$VIF, na.rm = TRUE), 3), "\n")
  cat("  Max VIF:", round(max(vif_df$VIF, na.rm = TRUE), 3), "\n")
  cat("  Variables with VIF > 5:", sum(vif_df$VIF > 5, na.rm = TRUE), "\n")
}

# Model diagnostics
cat("\n=== MODEL DIAGNOSTICS ===\n")
model_issues <- check_model_issues(valid_models)
print(model_issues)

# Results summary
cat("\n=== MODEL RESULTS ===\n")
cat("Model: imc_dummy ~ population + population² + density + voting% + euros_per_habitant + (1|year) + (1|muni)\n\n")

modelsummary(
  valid_models,
  stars = TRUE
)

# Save results
modelsummary(
  valid_models,
  stars = TRUE,
  output = "population_density_models.docx"
)

cat("\n=== SUMMARY ===\n")
cat("Successfully fitted", length(valid_models), "models\n")
cat("Services:", paste(names(valid_models), collapse = ", "), "\n")
cat("Variables included: population, population², density, voting%, euros per habitant\n")
cat("Results saved to: population_density_models.docx\n")